import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import itertools
import datetime
import matplotlib.cm as cm
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
df = pd.read_excel('Homeless in U.S.updated.xlsx')
df['Percentage 1'] = df['Percentage 1']*100
df['Percentage 2 - USICH'] = df['Percentage 2 - USICH']*100
df.drop(['Unnamed: 5'], axis=1, inplace=True)
df.head()
df.describe()
df = df.iloc[:51, :]
df = df[df.State != 'D.C.']
df.info()
df
#!pip install squarify
import squarify
df_copy = df.copy()
us_state_abbrev = {
'Alabama': 'AL',
'Alaska': 'AK',
'American Samoa': 'AS',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Guam': 'GU',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Northern Mariana Islands':'MP',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Puerto Rico': 'PR',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virgin Islands': 'VI',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY'
}
df_copy['Statecode'] = df_copy['State'].map(us_state_abbrev)
df_copy.head()
## Setting up the configurations
sns.set(context = 'talk', style = 'darkgrid', font_scale = 1)
plt.style.use('fivethirtyeight')
pd.set_option('display.max_columns', 500)
plt.rcParams['axes.unicode_minus'] = False
rc={'font.size': 16, 'axes.labelsize': 17, 'legend.fontsize': 16,\
'axes.titlesize': 16, 'xtick.labelsize': 16, 'ytick.labelsize': 17, "lines.linewidth": 2.5}
sns.set(context = 'poster', style = 'darkgrid', font_scale = 1, palette = 'muted', rc = rc)
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})
cmap = sns.color_palette("Blues", n_colors = 10)
ax = df.sort_values('Total Homeless' ,ascending=True)[['State','Total Homeless']]\
.plot.barh(legend = False, stacked=True, title = 'Total % Homeless by State', x = 'State', figsize = (17,15));
ax.set_alpha(0.8)
# create a list to collect the plt.patches data
totals = []
# find the values and append to list
for i in ax.patches:
totals.append(i.get_width())
# set individual bar lables using above list
total = sum(totals)
# set individual bar lables using above list
for i in ax.patches:
# get_width pulls left or right; get_y pushes up or down
ax.text(i.get_width()+.4, i.get_y(), \
str(round((i.get_width()/total)*100, 2))+'%', fontsize=15,
color='dimgrey')
plt.show();
ax = df.sort_values('Total Homeless' ,ascending=True)[['State','Total Homeless']]\
.plot.barh(legend = False, stacked=True, title = 'Total Homeless by State', x = 'State', cmap = plt.get_cmap('Spectral'), figsize = (17,15));
ax.set_alpha(0.8)
# create a list to collect the plt.patches data
totals = []
# find the values and append to list
for i in ax.patches:
totals.append(i.get_width())
# set individual bar lables using above list
total = sum(totals)
# set individual bar lables using above list
for i in ax.patches:
# get_width pulls left or right; get_y pushes up or down
ax.text(i.get_width()+.4, i.get_y(), \
str(int(i.get_width())), fontsize=15,
color='dimgrey')
plt.show();
dfpct = df.sort_values(by = 'Percentage 1', axis=0, ascending = True).reset_index(drop = True)
dfpct
fig, ax = plt.subplots(figsize = (16,13))
ax.hlines(dfpct.State, xmin=0, xmax=dfpct['Percentage 1'])
ax.plot(dfpct['Percentage 1'], dfpct.State, "o", color='red')
ax.set_xlim(dfpct['Percentage 1'].min(), dfpct['Percentage 1'].max()+0.001)
# Decorate
plt.title('Percentage of Homeless to the Total Population', fontdict={'size':20});
plt.grid(linestyle='--', alpha=0.5);
plt.show();
params = {'legend.fontsize': '20',
'figure.figsize': (15, 5),
'axes.labelsize': '18',
'axes.titlesize':'30',
'xtick.labelsize':'16',
'ytick.labelsize':'16'}
plt.rcParams.update(params)
plt.rcParams['text.color'] = '#A04000'
plt.rcParams['xtick.color'] = '#800000'
plt.rcParams['ytick.color'] = '#808000'
plt.rcParams['axes.labelcolor'] = '#283747'
from decimal import Decimal
pd.options.display.float_format = '{:.2f}'.format
ax = df.sort_values('Percentage 1' ,ascending=False)[['State','Percentage 1']]\
.plot.bar(legend = False, stacked=True, title = 'Percentage of Homeless Households by Percentage 1 by State', \
x = 'State', color=[plt.cm.Spectral(np.arange(len(df.State)))], figsize = (17,8), width = 1);
plt.xticks(rotation=90)
plt.tight_layout()
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
pos = 0
ax.patches[pos].set_facecolor('#000000')
pos = 1
ax.patches[pos].set_facecolor('#000000')
for pos in range(28,50):
ax.patches[pos].set_facecolor('#fff200')
plt.show();
ax = df.sort_values('Percentage 2 - USICH' ,ascending=False)[['State','Percentage 2 - USICH']]\
.plot.bar(legend = False, stacked=True, title = 'Percentage of Homeless Households by Percentage 1 by State (USICH)', \
x = 'State', color=[plt.cm.Spectral(np.arange(len(df.State)))], figsize = (17,8), width = 1);
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
pos = 0
ax.patches[pos].set_facecolor('#000000')
pos = 1
ax.patches[pos].set_facecolor('#000000')
for pos in range(28,50):
ax.patches[pos].set_facecolor('#fff200')
plt.show();
# With Log axis
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = True, stacked=False);
plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
plt.show();
# Without Log axis
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = False, stacked=False);
plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
plt.show();
# With Log axis and Stacked
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = True, stacked=True);
plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
plt.show();
# Without Log axis and Stacked
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = False, stacked=True);
plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')
plt.show();
params = {'legend.fontsize': '20',
'figure.figsize': (15, 5),
'axes.labelsize': '18',
'axes.titlesize':'30',
'xtick.labelsize':'16',
'ytick.labelsize':'16'}
plt.rcParams.update(params)
plt.rcParams['text.color'] = '#000000'
plt.rcParams['xtick.color'] = '#800000'
plt.rcParams['ytick.color'] = '#808000'
plt.rcParams['axes.labelcolor'] = '#283747'
df = df_copy['Total Homeless']
labels = df_copy.Statecode
sizes = df_copy['Total Homeless']
labels_ = pd.Series([str(i) + 'k' for i in list(np.round(sizes.values/1000,2))])
final_labels = labels + '\n' + labels_
colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))]
# Draw Plot
plt.figure(figsize=(15,9), dpi= 300)
squarify.plot(sizes=sizes, label=final_labels, color=colors, bar_kwargs={'alpha':.9},text_kwargs={'fontsize':11})
# Decorate
plt.title('Treemap of Total Homeless')
plt.axis('off')
plt.show();
df_copy
df_copy_grp = df_copy.set_index('State');
df_copy_grp[['Veterans','Young Adults']].plot(kind='bar', figsize = (17,9), stacked=False,logy=True, width = 0.75);
plt.figure(figsize=(17,10), dpi= 300)
plt.style.use('fivethirtyeight')
colors = cm.rainbow(np.linspace(0, 1, 50))
ax = plt.scatter(df_copy['Total Homeless'], df_copy['Population'], s=df_copy['Total Homeless'],c = colors, alpha=0.1, cmap = plt.get_cmap('Spectral'))
plt.ylabel('Population')
plt.xlabel('Total Homeless')
plt.title('Population versus Total Homeless')
for label, x, y in zip(list(df_copy['Statecode']), df_copy['Total Homeless'], df_copy['Population']):
plt.annotate(
label,
xy=(x, y), xytext=(-20, 20),
textcoords='offset points', ha='right', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.2),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
plt.yscale('log')
plt.xscale('log')
plt.show();
df_copy.sort_values('Homeless Households' ,ascending=True)[['State','Homeless Households']]\
.plot.barh(legend = False, stacked=True, title = 'Homeless Households by State', x = 'State', color = 'orange', figsize = (17,14));
plt.xticks(rotation=60)
plt.show();
If you want to see the varyiation based on a single metric and visualize the order and amount of this variance, the diverging bars is a great tool. It helps to quickly differentiate the performance of items(for example State in this case) and is quite intuitive and instantly conveys the point.
x = df_copy.loc[:, ['Homeless Households']]
df_copy['mean Homeless Households'] = (x - x.mean())/x.std()
df_copy['colors'] = ['red' if x < 0 else 'green' for x in df_copy['mean Homeless Households']]
df_copy.sort_values('mean Homeless Households', inplace=True)
df_copy = df_copy.reset_index(drop = True)
# Draw plot
plt.figure(figsize=(14,10), dpi= 300)
plt.hlines(y=df_copy.index, xmin=0, xmax=df_copy['mean Homeless Households'], color=df_copy.colors, alpha=0.4, linewidth=5)
# Decorations
plt.gca().set(ylabel='$State$', xlabel='$Homeless Households$')
plt.yticks(df_copy.index, df_copy.State, fontsize=12)
plt.title('Homeless Households normalised to the average Values', fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.show();
x = df_copy.loc[:, ['Total Homeless']]
df_copy['mean Total Homeless'] = (x - x.mean())/x.std()
df_copy['colors'] = ['red' if x > 0 else 'green' for x in df_copy['mean Total Homeless']]
df_copy.sort_values('mean Total Homeless', inplace=True)
df_copy = df_copy.reset_index(drop = True)
# Draw plot
plt.figure(figsize=(14,10), dpi= 300)
plt.hlines(y=df_copy.index, xmin=0, xmax=df_copy['mean Total Homeless'], color=df_copy.colors, alpha=0.4, linewidth=5)
# Decorations
plt.gca().set(ylabel='$State$', xlabel='$Total Homeless$')
plt.yticks(df_copy.index, df_copy.State, fontsize=12)
plt.title('Total Homeless normalised to the average Values', fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.show();